/*
 * Copyright 2015 Alexey Chernov <4ernov@gmail.com>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * digin_gen(), grisu() functions and kappa precalculation approach
 * were greatly influenced by
 * Florian Loitsch's original Grisu algorithms implementation
 * (http://florian.loitsch.com/publications/bench.tar.gz),
 * "Printing Floating-Point Numbers Quickly and Accurately with
 * Integers" paper
 * (http://florian.loitsch.com/publications/dtoa-pldi2010.pdf)
 * and Milo Yip's Grisu implementation
 * (https://github.com/miloyip/dtoa-benchmark)
 */

#ifndef FLOAXIE_GRISU_H
#define FLOAXIE_GRISU_H

#include <utility>
#include <limits>
#include <cstring>
#include <cstddef>
#include <cassert>

#include "diy_fp.h"
#include "cached_power.h"
#include "k_comp.h"
#include "static_pow.h"
#include "integer_of_size.h"
#include "bit_ops.h"
#include "memwrap.h"

namespace floaxie
{
	/** \brief Half of `diy_fp::mantissa_storage_type` to use in calculations.
	 */
	typedef std::uint32_t half_of_mantissa_storage_type;

	/** \brief `std::pair` containing exponent value and the corresponding
	 * power of 10.
	 */
	typedef std::pair<unsigned char, half_of_mantissa_storage_type> kappa_pair_type;

	/** \brief Bind exponent value and the corresponding power of 10. */
	template<unsigned char pow> constexpr kappa_pair_type make_kappa_div()
	{
		return kappa_pair_type(pow, static_pow<10, pow - 1>());
	}

	/** \brief Ground specialization of `make_kappa_div()` template. */
	template<> constexpr kappa_pair_type make_kappa_div<0>()
	{
		return kappa_pair_type(0, 1);
	}

	/** \brief Calculate the biggest power of 10 divisor for the specified
	 * value.
	 */
	inline kappa_pair_type calculate_kappa_div(half_of_mantissa_storage_type n) noexcept
	{
		if (n < static_pow<10, 1>()) return make_kappa_div<1>();
		if (n < static_pow<10, 2>()) return make_kappa_div<2>();
		if (n < static_pow<10, 3>()) return make_kappa_div<3>();
		if (n < static_pow<10, 4>()) return make_kappa_div<4>();
		if (n < static_pow<10, 5>()) return make_kappa_div<5>();
		if (n < static_pow<10, 6>()) return make_kappa_div<6>();
		if (n < static_pow<10, 7>()) return make_kappa_div<7>();
		if (n < static_pow<10, 8>()) return make_kappa_div<8>();
		if (n < static_pow<10, 9>()) return make_kappa_div<9>();
		return make_kappa_div<10>();
	}

	/** \brief Defines the maximum possible number of digits to be generated by
	 * the algorithm.
	 *
	 * \tparam FloatType floating point type to calculate the maximum number for.
	 */
	template<typename FloatType> constexpr std::size_t max_digits() noexcept
	{
		return std::numeric_limits<half_of_mantissa_storage_type>::digits10 +
			std::numeric_limits<typename diy_fp<FloatType>::mantissa_storage_type>::digits10;
	}

	template<bool positive_exponent> struct digit_gen_select;


	/** \brief Template class to select `digit_gen` implementation avoiding
	 * function template partial specialization.
	 */
	template<> struct digit_gen_select<false>
	{
		/** \brief Function of digit generation for the case of α < 0 and γ < 0.
		 *
		 * This is probably the fastest `digit_gen()` algorithm, which implies,
		 * that both **M+** and **M-** exponents are not positive.
		 *
		 * Implemented as static member to solve the problem of partial specialization
		 * of `digit_gen<bool, FloatType, CharType>()`.
		 *
		 * \tparam FloatType floating point type of `diy_fp` (`float` for single precision,
		 * `double` for double precision) of \p **Mm** and \p **Mp**.
		 * \tparam CharType character type (typically `char` or `wchar_t`) of \p **buffer**.
		 *
		 * \see [Printing Floating-Point Numbers Quickly and Accurately with
		 * Integers]
		 * (http://florian.loitsch.com/publications/dtoa-pldi2010.pdf)
		 */
		template<typename FloatType, typename CharType>
		inline static void gen(const diy_fp<FloatType>& Mp, const diy_fp<FloatType>& Mm, CharType* buffer, int* len, int* K) noexcept
		{
			assert(Mp.exponent() <= 0);

			const diy_fp<FloatType>& delta(Mp - Mm);

			const diy_fp<FloatType> one(raised_bit<typename diy_fp<FloatType>::mantissa_storage_type>(-Mp.exponent()), Mp.exponent());

			half_of_mantissa_storage_type p1 = Mp.mantissa() >> -one.exponent();
			typename diy_fp<FloatType>::mantissa_storage_type p2 = Mp.mantissa() & (one.mantissa() - 1);

			assert(p1 || p2);

			*len = 0;

			auto delta_f = delta.mantissa();

			const bool close_to_delta = p2 <= delta_f;

			if (p1)
			{
				auto&& kappa_div(calculate_kappa_div(p1));

				unsigned char& kappa(kappa_div.first);
				half_of_mantissa_storage_type& div(kappa_div.second);

				while (kappa > 0 && p1)
				{
					buffer[(*len)++] = '0' + p1 / div;

					p1 %= div;

					kappa--;
					div /= 10;
				}

				if (close_to_delta)
				{
					*K += kappa;
					return;
				}
				else
				{
					wrap::memset(buffer + (*len), CharType('0'), kappa);
					(*len) += kappa;
				}
			}

			const bool some_already_written = (*len) > 0;
			unsigned char kappa(0);

			while (p2 > delta_f)
			{
				p2 *= 10;

				const unsigned char d = p2 >> -one.exponent();

				if (some_already_written || d)
					buffer[(*len)++] = '0' + d;

				p2 &= one.mantissa() - 1;

				++kappa;
				delta_f *= 10;
			}

			*K -= kappa;
		}
	};


	/** \brief Digit generation function template.
	 *
	 * Digit generation algorithm tries to find the value in the range of
	 * **(M-, M+)** with the shortest possible string representation, which is
	 * then treated as the representation of the original value, since it
	 * converts back to it.
	 *
	 * General (slow) implementation is of little use, but the optimized
	 * versions of the function relies so much on α and γ values used in
	 * **Grisu** algorithm. This implementation requires both α and γ be
	 * greater of equal, than the bit size of `diy_fp::mantissa_storage_type`
	 * to use the most optimal `digit_gen()` implementation.
	 *
	 * \tparam alpha α value of **Grisu** algorithm
	 * \tparam gamma γ value of **Grisu** algorithm
	 * \tparam CharType character type (typically `char` or `wchar_t`) of the
	 * output buffer \p **buffer**
	 *
	 * \param Mp **M+** value (right boundary)
	 * \param Mm **M-** value (left boundary)
	 * \param buffer character buffer to print the representation to
	 * \param len output parameter to return the length of the representation
	 * printed
	 * \param K output parameter to return **K** (decimal exponent) of the
	 * value
	 *
	 * \see `max_digits()`
	 * \see `max_buffer_size()`
	 * \see [Printing Floating-Point Numbers Quickly and Accurately with
	 * Integers]
	 * (http://florian.loitsch.com/publications/dtoa-pldi2010.pdf)
	 */
	template<int alpha, int gamma,
	typename FloatType, typename CharType>
	inline void digit_gen(const diy_fp<FloatType>& Mp, const diy_fp<FloatType>& Mm, CharType* buffer, int* len, int* K) noexcept
	{
		static_assert(static_cast<std::size_t>(constexpr_abs(alpha)) >= bit_size<typename diy_fp<FloatType>::mantissa_storage_type>() / 2 &&
			static_cast<std::size_t>(constexpr_abs(gamma)) >= bit_size<typename diy_fp<FloatType>::mantissa_storage_type>() / 2,
			"Current implementation supports only α and γ, which absolute values are equal or higher, "
			"than a half of integer mantissa bit size (typically 32) for performance reasons.");

		constexpr bool exponent_is_positive = alpha > 0 && gamma > 0;
		digit_gen_select<exponent_is_positive>::gen(Mp, Mm, buffer, len, K);
	}

	/** \brief **Grisu2** algorithm implementation.
	 *
	 * \tparam alpha α value of **Grisu** algorithm
	 * \tparam gamma γ value of **Grisu** algorithm
	 * \tparam FloatType type of input floating-point value (calculated by type
	 * of \p **v** parameter)
	 * \tparam CharType character type (typically `char` or `wchar_t`) of the
	 * output buffer \p **buffer**
	 *
	 * \param v floating point value to print
	 * \param buffer large enough character buffer to print to
	 * \param length output parameter to return the length of printed
	 * representation
	 * \param K output parameter to return **K** (decimal exponent) of the
	 * value
	 *
	 * \see `max_digits()`
	 * \see `max_buffer_size()`
	 * \see [Printing Floating-Point Numbers Quickly and Accurately with
	 * Integers]
	 * (http://florian.loitsch.com/publications/dtoa-pldi2010.pdf)
	 */
	template<int alpha, int gamma,
	typename FloatType, typename CharType> inline void grisu2(FloatType v, CharType* buffer, int* length, int* K) noexcept
	{
		static_assert(alpha <= gamma - 3,
			"It's imposed that γ ⩾ α + 3, since otherwise it's not always possible to find a proper decimal cached power");

		std::pair<diy_fp<FloatType>, diy_fp<FloatType>>&& w(diy_fp<FloatType>::boundaries(v));
		diy_fp<FloatType> &w_m(w.first), &w_p(w.second);

		const int mk = k_comp_exp<alpha, gamma>(w_p.exponent());
		const diy_fp<FloatType>& c_mk(cached_power<FloatType>(mk));

		w_m *= c_mk;
		w_p *= c_mk;

		++w_m;
		--w_p;

		*K = -mk;

		digit_gen<alpha, gamma>(w_p, w_m, buffer, length, K);
	}

	/** \brief Structure to hold Grisu algorithm parameters, **α** and **γ**. */
	struct parameters
	{
		int alpha, gamma;
	};

	/** \brief Template variable of Grisu algorithm parameters.
	 *
	 * Used to provide proper values of Grisu algorithm for the specified
	 * floating point type.
	 */
	template<typename FloatType> constexpr parameters grisu_parameters
	{
		-(int(bit_size<FloatType>() / 2 + 3)), // α
		-int(bit_size<FloatType>() / 2) // γ
	};
}

#endif // FLOAXIE_GRISU_H
